Makeover of Data Visualization

The aim of this article is to critique the given data visualization on Singapore Labour Force Participation Rate among different age-groups over the past 12 years and propose and build an improved visualization for the same using the Tableau Desktop interface.

true
2022-02-20

R packages needed for the exercise

The following packages in R will be required to use various functions offered by them for data preparation and data visualizations. plotly package is used to create the animated and interactive plots in R.

packages = c( 'knitr','corrplot', 'ggstatsplot','plotly','tidyverse','heatmaply','seriation', 'dendextend','reshape')
for(p in packages)
{
  if(!require(p,character.only = T))
  {
    install.packages(p)
  }
  library(p,character.only = T)
}

Data Prepartion

Two data sets, one containing Singapore population data for the time period 2000 to 2010 and the other containing the same for the time period 2011 to 2020 are imported and viewed as follows:

starbucks_drink <- read_csv("data/starbucks_drink.csv")
unique(starbucks_drink$Category)
 [1] "iced-coffee"                   "refreshers"                   
 [3] "evolution-fresh"               "iced-tea"                     
 [5] "bottled-drinks"                "brewed-coffee"                
 [7] "espresso"                      "frappuccino-blended-beverages"
 [9] "kids-drinks-and-other"         "tea"                          
kids_drinks_and_other <- starbucks_drink %>%
  filter(Category == "kids-drinks-and-other" )

unique(kids_drinks_and_other$Name)
[1] "Cinnamon Dolce Crème"         "Hot Chocolate"               
[3] "Pumpkin Spice Crème"          "Salted Caramel Hot Chocolate"
[5] "Steamed Apple Juice"          "Vanilla Crème"               
unique(kids_drinks_and_other$Milk)
[1] "Almond"              "Coconut"             "Nonfat milk"        
[4] "Whole Milk"          "2% Milk"             "Soy (United States)"
[7] NA                   
unique(kids_drinks_and_other$`Whipped Cream`)
[1] "No Whipped Cream" "Whipped Cream"    NA                
#summary(kids_drinks_and_other)
ggstatsplot::ggcorrmat(
  data = kids_drinks_and_other, 
  cor.vars = 3:15,
  ggcorrplot.args = list(outline.color = "black", 
                         hc.order = TRUE,
                         tl.cex = 14),
  title    = "Correlogram for Starbucks dataset",
  subtitle = "16 pairs are not significantly correlated at p < 0.05",
   colors = c("#CC6600", "white", "#000066"), outline.color = "black",
  ggtheme = theme_minimal()
)

spec_tbl_df [262 x 18] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ Category             : chr [1:262] "kids-drinks-and-other" "kids-drinks-and-other" "kids-drinks-and-other" "kids-drinks-and-other" ...
 $ Name                 : chr [1:262] "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" "Cinnamon Dolce Crème" ...
 $ Portion(fl oz)       : num [1:262] 12 12 12 12 12 12 12 12 12 12 ...
 $ Calories             : num [1:262] 140 210 170 230 170 230 240 310 210 280 ...
 $ Calories from fat    : num [1:262] 45 100 50 110 0 60 80 140 50 100 ...
 $ Total Fat(g)         : num [1:262] 5 11 6 12 0 6 9 15 6 12 ...
 $ Saturated fat(g)     : num [1:262] 0 4 5 9 0 4 5 9 3.5 7 ...
 $ Trans fat(g)         : num [1:262] 0 0 0 0 0 0 0 0 0 0 ...
 $ Cholesterol(mg)      : num [1:262] 0 20 0 20 5 25 30 45 25 40 ...
 $ Sodium(mg)           : num [1:262] 120 125 130 135 120 125 120 125 135 140 ...
 $ Total Carbohydrate(g): num [1:262] 25 27 28 30 32 34 32 34 32 34 ...
 $ Dietary Fiber(g)     : num [1:262] 1 1 0 1 0 0 0 0 0 0 ...
 $ Sugars(g)            : num [1:262] 22 25 26 28 31 33 31 33 31 33 ...
 $ Protein(g)           : num [1:262] 2 2 1 1 10 10 9 9 9 10 ...
 $ Caffeine(mg)         : chr [1:262] "0" "0" "0" "0" ...
 $ Size                 : chr [1:262] "Tall" "Tall" "Tall" "Tall" ...
 $ Milk                 : chr [1:262] "Almond" "Almond" "Coconut" "Coconut" ...
 $ Whipped Cream        : chr [1:262] "No Whipped Cream" "Whipped Cream" "No Whipped Cream" "Whipped Cream" ...
 - attr(*, "spec")=
  .. cols(
  ..   Category = col_character(),
  ..   Name = col_character(),
  ..   `Portion(fl oz)` = col_double(),
  ..   Calories = col_double(),
  ..   `Calories from fat` = col_double(),
  ..   `Total Fat(g)` = col_double(),
  ..   `Saturated fat(g)` = col_double(),
  ..   `Trans fat(g)` = col_double(),
  ..   `Cholesterol(mg)` = col_double(),
  ..   `Sodium(mg)` = col_double(),
  ..   `Total Carbohydrate(g)` = col_double(),
  ..   `Dietary Fiber(g)` = col_double(),
  ..   `Sugars(g)` = col_double(),
  ..   `Protein(g)` = col_double(),
  ..   `Caffeine(mg)` = col_character(),
  ..   Size = col_character(),
  ..   Milk = col_character(),
  ..   `Whipped Cream` = col_character()
  .. )
 - attr(*, "problems")=<externalptr> 
Portion(fl oz) Calories Calories from fat Total Fat(g) Saturated fat(g) Trans fat(g)
Min. : 8.00 Min. : 90.0 Min. : 0.00 Min. : 0.000 Min. : 0.000 Min. :0.00000
1st Qu.: 8.00 1st Qu.:192.5 1st Qu.: 50.00 1st Qu.: 6.000 1st Qu.: 2.000 1st Qu.:0.00000
Median :12.00 Median :270.0 Median : 80.00 Median : 9.000 Median : 4.750 Median :0.00000
Mean :13.51 Mean :283.8 Mean : 83.24 Mean : 9.177 Mean : 5.149 Mean :0.03053
3rd Qu.:16.00 3rd Qu.:350.0 3rd Qu.:110.00 3rd Qu.:12.750 3rd Qu.: 7.000 3rd Qu.:0.00000
Max. :24.00 Max. :650.0 Max. :220.00 Max. :24.000 Max. :15.000 Max. :0.50000
Cholesterol(mg) Sodium(mg) Total Carbohydrate(g) Dietary Fiber(g) Sugars(g) Protein(g) Caffeine(mg)
Min. : 0.00 Min. : 10.0 Min. :14.00 Min. :0.00 Min. :12.00 Min. : 0.000 Min. : 0.00
1st Qu.: 0.00 1st Qu.:115.0 1st Qu.:28.00 1st Qu.:0.00 1st Qu.:25.00 1st Qu.: 4.000 1st Qu.: 0.00
Median :20.00 Median :160.0 Median :39.50 Median :1.00 Median :37.00 Median : 7.000 Median : 0.00
Mean :21.49 Mean :172.4 Mean :42.47 Mean :1.79 Mean :38.67 Mean : 8.237 Mean : 10.25
3rd Qu.:30.00 3rd Qu.:210.0 3rd Qu.:53.00 3rd Qu.:3.00 3rd Qu.:48.75 3rd Qu.:12.000 3rd Qu.: 20.00
Max. :75.00 Max. :460.0 Max. :99.00 Max. :7.00 Max. :85.00 Max. :19.000 Max. :225.00
kids_drinks_and_other$Milk[is.na(kids_drinks_and_other$Milk)] <- 'No Milk'
kids_drinks_and_other$`Whipped Cream`[is.na(kids_drinks_and_other$`Whipped Cream`)] <- 'No Whipped Cream'
kids_drinks_and_other$Kids_drinks <- paste(kids_drinks_and_other$Name,"_",kids_drinks_and_other$Milk,"_", kids_drinks_and_other$`Whipped Cream`,"-", kids_drinks_and_other$Size)
sum(duplicated(kids_drinks_and_other))
[1] 0
kids_drinks_and_other[, c(3:15)] <- sapply(kids_drinks_and_other[, c(3:15)], as.numeric)
kids <- dplyr::select(kids_drinks_and_other, c(19, 4:15))
kids[, 2:13] <- scale(kids[, 2:13])
row.names(kids) <- kids$Kids_drinks
kidsdrinks_matrix <- data.matrix(kids)
wh_d <- dist(kidsdrinks_matrix[,-c(1)], method = "euclidean")
dend_expend(wh_d)[[3]]
  dist_methods hclust_methods     optim
1      unknown         ward.D 0.6015018
2      unknown        ward.D2 0.6644386
3      unknown         single 0.6790721
4      unknown       complete 0.7399215
5      unknown        average 0.8008273
6      unknown       mcquitty 0.6972242
7      unknown         median 0.4415643
8      unknown       centroid 0.7779656

heatmaply(kidsdrinks_matrix[,-c(1)],
          Colv=NA,
          dist_method = "euclidean",
          hclust_method = "mcquitty",
          k_row = 7,
          fontsize_row = 2.5,
          fontsize_col = 5,
          colors = Blues)%>% layout(height=800,width=600)